library(DESeq2)
library(tximport)
library(tidyverse)
library(gridExtra)
library(ensembldb)
library(EnsDb.Mmusculus.v79)
library(grid)
library(ggplot2)
library(lattice)
library(reshape)
library(mixOmics)
library(gplots)
library(RColorBrewer)
library(readr)
library(dplyr)
library(VennDiagram)
library(clusterProfiler)
library(DOSE)
library(org.Mm.eg.db)
library(pathview)
library(AnnotationDbi)
library(apeglm)
library(gtools)
library(knitr)
library(biomaRt)
library(fgsea)
library(pheatmap)
library(enrichplot)
p_value <- 0.05
LFC_CUTOFF <- 1
This is an RNASeq dataset generated by Moon Hee Yang and the Broad
Samples used here are colonic tumor with different alleles of K-Ras.
Fabp-Cre, Apc fl/+ colonic tumor with K-RasWT, K-RasG12D, K-RasG12D/K104Q
Set working directory to the folder that contains only gene count
sf files
# Generate a tx2gene object for matrix generation
edb <- EnsDb.Mmusculus.v79
transcriptsID <- as.data.frame(transcripts(edb))
tx2gene <- as.data.frame(cbind(transcriptsID$tx_id, transcriptsID$gene_id))
# Generate DESeqData using the counting result generated using Salmon
countFiles = list.files("Data/Gene Counts/", pattern=".sf$", full.names = TRUE)
basename(countFiles)
## [1] "G12D-1_MY7191.sf" "G12D-2_MHY8102.sf" "G12D-3_MY6962.sf"
## [4] "G12DK104Q-1_MY6895.sf" "G12DK104Q-2_MY7438.sf" "G12DK104Q-3_MY7069.sf"
## [7] "WT-1_SRS638.sf" "WT-2_SRS1059.sf" "WT-3_YW2801.sf"
names(countFiles) <- c('G12D_1','G12D_2','G12D_3','G12DK104Q_1','G12DK104Q_2','G12DK104Q_3','WT_1','WT_2','WT_3')
txi.salmon <- tximport(countFiles, type = "salmon", tx2gene = tx2gene, ignoreTxVersion = TRUE)
## reading in files with read_tsv
## 1 2 3 4 5 6 7 8 9
## transcripts missing from tx2gene: 30916
## summarizing abundance
## summarizing counts
## summarizing length
DESeqsampletable <- data.frame(condition = c('control','control','control','experimental','experimental','experimental','experimental','experimental','experimental'))
DESeqsampletable$name <- c('G12D_1','G12D_2','G12D_3','G12DK104Q_1','G12DK104Q_2','G12DK104Q_3','WT_1','WT_2','WT_3')
DESeqsampletable$kras <- c('G12D','G12D','G12D','G12DK104Q','G12DK104Q','G12DK104Q','WT','WT','WT')
DESeqsampletable$gender <- c('M','M','M','F','F','M','M','M','M')
rownames(DESeqsampletable) <- colnames(txi.salmon$counts)
ddsHTSeq<- DESeqDataSetFromTximport(txi.salmon, DESeqsampletable, ~ gender + condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
ddsHTSeq_norm <- DESeq(ddsHTSeq)
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
size_factor <- normalizationFactors(ddsHTSeq_norm)
ddsHTSeq_analysis <- lfcShrink(ddsHTSeq_norm, coef = "condition_experimental_vs_control", type = "apeglm")
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
DESeq2::plotMA(ddsHTSeq_analysis)
Data is transformed and pseudocount is calculated.
rawCountTable <- as.data.frame(DESeq2::counts(ddsHTSeq_norm, normalize = TRUE))
pseudoCount = log2(rawCountTable + 1)
grid.arrange(
ggplot(pseudoCount, aes(x= WT_1)) + xlab(expression(log[2](count + 1))) + ylab("Number of Genes") +
geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "WT_1"),
ggplot(pseudoCount, aes(x= WT_2)) + xlab(expression(log[2](count + 1))) + ylab("Number of Genes") +
geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "WT_2"),
ggplot(pseudoCount, aes(x= WT_3)) + xlab(expression(log[2](count + 1))) + ylab("Number of Genes") +
geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "WT_3"),
ggplot(pseudoCount, aes(x= G12D_1)) + xlab(expression(log[2](count + 1))) + ylab("Number of Genes") +
geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "G12D_1"),
ggplot(pseudoCount, aes(x= G12D_2)) + xlab(expression(log[2](count + 1))) + ylab("Number of Genes") +
geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "G12D_2"),
ggplot(pseudoCount, aes(x= G12D_3)) + xlab(expression(log[2](count + 1))) + ylab("Number of Genes") +
geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "G12D_3"),
ggplot(pseudoCount, aes(x= G12DK104Q_1)) + xlab(expression(log[2](count + 1))) + ylab("Number of Genes") +
geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "G12DK104Q_1"),
ggplot(pseudoCount, aes(x= G12DK104Q_2)) + xlab(expression(log[2](count + 1))) + ylab("Number of Genes") +
geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "G12DK104Q_2"),
ggplot(pseudoCount, aes(x= G12DK104Q_3)) + xlab(expression(log[2](count + 1))) + ylab("Number of Genes") +
geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "G12DK104Q_3"),
nrow = 3)
Check on the gene count distribution across all genes.
#Boxplots
suppressMessages(df <- melt(pseudoCount, variable_name = "Samples"))
df <- df %>% separate(Samples, into = c("Condition", NA), sep = "_", remove = FALSE)
ggplot(df, aes(x=Samples, y=value, fill = Condition)) + geom_boxplot() + xlab("") +
ylab(expression(log[2](count+1))) + scale_fill_manual(values = c("#619CFF", "#F564E3", "#2EF23C")) + theme(axis.text.x = element_text(angle = 90, hjust = 1))
#Histograms and density plots
ggplot(df, aes(x=value, colour = Samples, fill = Samples)) + ylim(c(0, 0.25)) +
geom_density(alpha = 0.2, size = 1.25) + facet_wrap(~ Condition) +
theme(legend.position = "top") + xlab(expression(log[2](count+1)))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
This is the sanity check step to confirm that under a un-supervised
clustering, WT and G12D samples will cluster together. For some reason,
the code is giving error when try to plot this heatmap in RStudio, so I
created a pdf file that contains the heatmap in the Analysis folder
named Hierchical Clustering.pdf
ddsHTSeq_transform <- varianceStabilizingTransformation(ddsHTSeq_norm)
rawCountTable_transform <- as.data.frame(assay(ddsHTSeq_transform))
rawCountTable_transform_all <- rawCountTable_transform
pseudoCount_transform = log2(rawCountTable_transform + 1)
mat.dist = pseudoCount_transform
mat.dist = as.matrix(dist(t(mat.dist)))
mat.dist = mat.dist/max(mat.dist)
png('Figure/Hierchical_Clustering.png')
cim(mat.dist, symkey = FALSE, margins = c(8, 8))
suppressMessages(dev.off())
## quartz_off_screen
## 2
include_graphics("Figure/Hierchical_Clustering.png")
I performed PCA analysis on all datasets to confirm that samples from
the same sample group group together. This step has to be performed
using varianceStabelizingTransformed dataset, so that the
high variance in genes with low counts will not skew the data.
The top 500 most variable genes are selected for PCA analysis.
plotPCA(ddsHTSeq_transform, intgroup = "kras", ntop = 500)
rawCountTable_no_normalization <- as.data.frame(DESeq2::counts(ddsHTSeq))
keep <- rowMeans(rawCountTable[,1:3]) > 50 | rowMeans(rawCountTable[,4:6]) > 50 | rowMeans(rawCountTable[,7:9]) > 50
filterCount <- pseudoCount[keep,]
rawCountTable_transform_all <- rawCountTable_transform_all[keep,]
keep_genes <- rownames(filterCount)
df <- melt(filterCount, variable_name = "Samples")
## Using as id variables
df <- df %>% separate(Samples, into = c("Condition", NA), sep = "_", remove = FALSE)
detected_raw_count_norm <- rawCountTable[keep,]
write.csv(detected_raw_count_norm, "Result/normalized_gene_counts.csv")
rawCountTable_no_normalization <- rawCountTable_no_normalization[keep,]
write.csv(rawCountTable_no_normalization, "Result/raw_gene_counts.csv")
ggplot(df, aes(x=value, colour = Samples, fill = Samples)) + ylim(c(0, 0.25)) +
geom_density(alpha = 0.2, size = 1.25) + facet_wrap(~ Condition) +
theme(legend.position = "top") + xlab(expression(log[2](count+1)))
countFiles_ss <- countFiles[c(7,8,9,1,2,3)]
txi.salmon <- tximport(countFiles_ss, type = "salmon", tx2gene = tx2gene, ignoreTxVersion = TRUE)
## reading in files with read_tsv
## 1 2 3 4 5 6
## transcripts missing from tx2gene: 30916
## summarizing abundance
## summarizing counts
## summarizing length
DESeqsampletable_ss <- DESeqsampletable[c(7,8,9,1,2,3),]
DESeqsampletable_ss$condition <- c("control", "control", "control", "experimental", "experimental", "experimental")
ddsHTSeq<- DESeqDataSetFromTximport(txi.salmon, DESeqsampletable_ss, ~ condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
raw_ct <- counts(ddsHTSeq)
norm_factor <- size_factor[rownames(size_factor) %in% rownames(raw_ct), c(7,8,9,1,2,3)]
normalizationFactors(ddsHTSeq) <- norm_factor
ddsHTSeq_norm <- DESeq(ddsHTSeq)
## using pre-existing normalization factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
ddsHTSeq_analysis <- lfcShrink(ddsHTSeq_norm, coef = "condition_experimental_vs_control", type = "apeglm")
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
DESeq2::plotMA(ddsHTSeq_analysis)
ddsHTSeq_transform <- varianceStabilizingTransformation(ddsHTSeq_norm)
rawCountTable_transform <- as.data.frame(assay(ddsHTSeq_transform))
pseudoCount_transform = log2(rawCountTable_transform + 1)
mat.dist = pseudoCount_transform
mat.dist = as.matrix(dist(t(mat.dist)))
mat.dist = mat.dist/max(mat.dist)
png('Figure/Hierchical_Clustering_G12D_vs_WT.png')
cim(mat.dist, symkey = FALSE, margins = c(6, 6))
suppressMessages(dev.off())
## quartz_off_screen
## 2
include_graphics("Figure/Hierchical_Clustering_G12D_vs_WT.png")
plotPCA(ddsHTSeq_transform, intgroup = "kras", ntop = 500)
ensembl = useMart("ensembl", dataset="mmusculus_gene_ensembl")
symbols = getBM(attributes=c("entrezgene_id", "mgi_symbol", "ensembl_gene_id"), mart=ensembl) %>%
mutate(entrezgene_id=as.character(entrezgene_id))
dif_analysis <- as.data.frame(ddsHTSeq_analysis)
dif_analysis <- dif_analysis[rownames(dif_analysis) %in% keep_genes, ]
colnames(dif_analysis) <- paste0("G12D-WT_", colnames(dif_analysis))
dif_analysis <- rownames_to_column(dif_analysis, var = "Ensembl_ID")
rawCountTable <- as.data.frame(DESeq2::counts(ddsHTSeq_norm, normalize = TRUE))
detected_rawCountTable <- rawCountTable[rownames(rawCountTable) %in% keep_genes,]
dif_analysis$WT_mean <- rowMeans(detected_rawCountTable[,1:3])
dif_analysis$G12D_mean <- rowMeans(detected_rawCountTable[,4:6])
# calculate signal-to-noise ratio for GSEA later
s2n <- function(num_list, cond_1 = c(4:6), cond_2 = c(1:3)) {
mean1 <- mean(num_list[cond_1])
if (mean1 == 0) {
mean1 = 1
}
mean2 <- mean(num_list[cond_2])
if (mean2 == 0) {
mean2 = 1
}
sd1 <- sd(num_list[cond_1])
sd2 <- sd(num_list[cond_2])
sd1 <- min(sd1, 0.2*abs(mean1))
sd2 <- min(sd2, 0.2*abs(mean2))
s2nvalue <- (mean1-mean2)/(sd1+sd2)
return(s2nvalue)
}
dif_analysis$'G12D-WT_s2n' <- apply(detected_rawCountTable,1,s2n,cond_1 = c(4:6),cond_2 = c(1:3))
dif_analysis <- dif_analysis %>% left_join(symbols, by = c("Ensembl_ID" = "ensembl_gene_id")) %>% as_tibble()
dif_analysis <- dif_analysis[!duplicated(dif_analysis$Ensembl_ID), ]
dif_analysis <- dif_analysis[,c(1,10,11,2:9)]
suppressMessages(library(mosaic))
rawCountTable_transform_detected <- rawCountTable_transform[rownames(rawCountTable_transform) %in% keep_genes,]
sig_dif <- dif_analysis %>% dplyr::filter(`G12D-WT_padj`< p_value) %>% dplyr::filter(`G12D-WT_log2FoldChange` > LFC_CUTOFF | `G12D-WT_log2FoldChange` < -LFC_CUTOFF) %>% dplyr::select(Ensembl_ID) %>% unique() %>% unlist()
sig_count <- rawCountTable_transform_detected[rownames(rawCountTable_transform_detected) %in% sig_dif, ]
sig_count_z <- t(apply(sig_count,1,zscore))
col_annot <- DESeqsampletable_ss[,c(1,3)]
colnames(col_annot) <- c("Condition", "Kras_status")
p <- pheatmap(sig_count_z, show_rownames = FALSE, annotation_col = col_annot, cluster_cols = FALSE)
ggsave(filename = "Heatmap_G12D_vs_WT.png", p, width = 6, height = 10, path = "Figure")
include_graphics("Figure/Heatmap_G12D_vs_WT.png")
# Volcano Plot
ggplot(dif_analysis, aes(x = `G12D-WT_log2FoldChange`, y = -log(`G12D-WT_padj`,10))) +
xlab("LFC") + ylab("-log10(P value)") +
geom_point(data = dif_analysis, alpha = 0.5, size = 1, color = "black") +
geom_point(data = subset(dif_analysis, `G12D-WT_padj` < p_value & `G12D-WT_log2FoldChange` > LFC_CUTOFF), alpha = 0.5, size = 1, color = "red") +
geom_point(data = subset(dif_analysis, `G12D-WT_padj` < p_value & `G12D-WT_log2FoldChange` < -LFC_CUTOFF), alpha = 0.5, size = 1, color = "blue") +
geom_hline(yintercept = -log(p_value,10)) +
geom_vline(xintercept = c(-LFC_CUTOFF,LFC_CUTOFF)) +
labs(title = "G12D vs WT Volcano Plot")
## Warning: Removed 101 rows containing missing values (`geom_point()`).
Classic GO analysis is performed here for all DE genes detected in this dataset. The reference list is list of genes detected in RNASeq. Three categories of GO terms are tested here, including biological process, molecular function and cellular component.
target_gene <- sig_dif
detected_gene <- rownames(detected_rawCountTable)
# Run GO enrichment analysis for biological process
ego_BP <- enrichGO(gene = target_gene,
universe = detected_gene,
keyType = "ENSEMBL",
OrgDb = org.Mm.eg.db,
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = p_value,
readable = TRUE)
ego_BP <- pairwise_termsim(ego_BP)
# Output results from GO analysis to a table
cluster_summary_BP <- data.frame(ego_BP)
dir.create("Result/GO", showWarnings = F)
write.csv(cluster_summary_BP, "Result/GO/G12D-WT_GO analysis_BP.csv")
# Run GO enrichment analysis for molecular function
ego_MF <- enrichGO(gene = target_gene,
universe = detected_gene,
keyType = "ENSEMBL",
OrgDb = org.Mm.eg.db,
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff = p_value,
readable = TRUE)
ego_MF <- pairwise_termsim(ego_MF)
# Output results from GO analysis to a table
cluster_summary_MF <- data.frame(ego_MF)
write.csv(cluster_summary_MF, "Result/GO/G12D-WT_GO analysis_MF.csv")
# Run GO enrichment analysis for cellular component
ego_CC <- enrichGO(gene = target_gene,
universe = detected_gene,
keyType = "ENSEMBL",
OrgDb = org.Mm.eg.db,
ont = "CC",
pAdjustMethod = "BH",
pvalueCutoff = p_value,
readable = TRUE)
ego_CC <- pairwise_termsim(ego_CC)
# Output results from GO analysis to a table
cluster_summary_CC <- data.frame(ego_CC)
write.csv(cluster_summary_CC, "Result/GO/G12D-WT_GO analysis_CC.csv")
dotplot(ego_BP, showCategory=25)
emapplot(ego_BP, showCategory = 50)
## Warning: ggrepel: 30 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
dotplot(ego_MF, showCategory=25)
emapplot(ego_MF, showCategory = 50)
dotplot(ego_CC, showCategory=25)
emapplot(ego_CC, showCategory = 50)
target_gene <- dif_analysis %>% dplyr::filter(Ensembl_ID %in% sig_dif) %>% dplyr::select(entrezgene_id) %>% unlist() %>% as.character()
detected_gene <- dif_analysis %>% dplyr::select(entrezgene_id) %>% unlist() %>% as.character()
kk <- enrichKEGG(gene = target_gene,
universe = detected_gene,
organism = 'mmu',
keyType = "kegg",
pAdjustMethod = "BH",
pvalueCutoff = p_value)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/mmu/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/mmu"...
kk.result <- as.data.frame(kk)
dir.create("Result/KEGG", showWarnings = FALSE)
write.csv(kk.result, "Result/KEGG/G12D-WT_KEGG analysis.csv")
dotplot(kk, showCategory=25)
load("Data/mouse_H_v5p2.rdata")
pathways <- Mm.H
gseadata <- dif_analysis$entrezgene_id
ranks <- dif_analysis$`G12D-WT_s2n`
names(ranks) <- dif_analysis$entrezgene_id
ranks <- sort(ranks, decreasing = T)
fgseaRes <- fgsea(pathways, ranks, minSize=15, maxSize = 500)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
fgseaRes <- fgseaRes[fgseaRes$padj < p_value,]
suppressMessages(fgseaRes[, leadingEdge := lapply(leadingEdge, mapIds, x=org.Mm.eg.db, keytype="ENTREZID", column="SYMBOL")])
fgseaRes$leadingEdge <- as.character(fgseaRes$leadingEdge)
fgseaRes <- fgseaRes[order(-fgseaRes$NES), ]
dir.create("Result/GSEA", showWarnings = FALSE)
write.csv(as.data.frame(fgseaRes), "Result/GSEA/G12D-WT_Sig_GSEA_Hallmark.csv")
grid.newpage()
plotGseaTable(pathways[fgseaRes$pathway], ranks, fgseaRes, gseaParam = 0.5)
dif_analysis_master <- dif_analysis
countFiles_ss <- countFiles[c(1:6)]
txi.salmon <- tximport(countFiles_ss, type = "salmon", tx2gene = tx2gene, ignoreTxVersion = TRUE)
## reading in files with read_tsv
## 1 2 3 4 5 6
## transcripts missing from tx2gene: 30916
## summarizing abundance
## summarizing counts
## summarizing length
DESeqsampletable_ss <- DESeqsampletable[c(1:6),]
ddsHTSeq<- DESeqDataSetFromTximport(txi.salmon, DESeqsampletable_ss, ~ condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
raw_ct <- DESeq2::counts(ddsHTSeq)
norm_factor <- size_factor[rownames(size_factor) %in% rownames(raw_ct), c(1:6)]
normalizationFactors(ddsHTSeq) <- norm_factor
ddsHTSeq_norm <- DESeq(ddsHTSeq)
## using pre-existing normalization factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
ddsHTSeq_analysis <- lfcShrink(ddsHTSeq_norm, coef = "condition_experimental_vs_control", type = "apeglm")
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
DESeq2::plotMA(ddsHTSeq_analysis)
ddsHTSeq_transform <- varianceStabilizingTransformation(ddsHTSeq_norm)
rawCountTable_transform <- as.data.frame(assay(ddsHTSeq_transform))
pseudoCount_transform = log2(rawCountTable_transform + 1)
mat.dist = pseudoCount_transform
mat.dist = as.matrix(dist(t(mat.dist)))
mat.dist = mat.dist/max(mat.dist)
png('Figure/Hierchical_Clustering_G12DK104Q_vs_G12D.png')
cim(mat.dist, symkey = FALSE, margins = c(6, 6))
suppressMessages(dev.off())
## quartz_off_screen
## 2
include_graphics("Figure/Hierchical_Clustering_G12DK104Q_vs_G12D.png")
plotPCA(ddsHTSeq_transform, intgroup = "kras", ntop = 500)
dif_analysis <- as.data.frame(ddsHTSeq_analysis)
dif_analysis <- dif_analysis[rownames(dif_analysis) %in% keep_genes, ]
colnames(dif_analysis) <- paste0("G12DK104Q-G12D_", colnames(dif_analysis))
dif_analysis <- rownames_to_column(dif_analysis, var = "Ensembl_ID")
rawCountTable <- as.data.frame(DESeq2::counts(ddsHTSeq_norm, normalize = TRUE))
detected_rawCountTable <- rawCountTable[rownames(rawCountTable) %in% keep_genes,]
dif_analysis$G12D_mean <- rowMeans(detected_rawCountTable[,1:3])
dif_analysis$G12DK104Q_mean <- rowMeans(detected_rawCountTable[,4:6])
dif_analysis$'G12DK104Q-G12D_s2n' <- apply(detected_rawCountTable,1,s2n,cond_1 = c(4:6),cond_2 = c(1:3))
suppressMessages(library(mosaic))
rawCountTable_transform_detected <- rawCountTable_transform[rownames(rawCountTable_transform) %in% keep_genes,]
sig_dif <- dif_analysis %>% dplyr::filter(`G12DK104Q-G12D_padj`< p_value) %>% dplyr::filter(`G12DK104Q-G12D_log2FoldChange` > LFC_CUTOFF | `G12DK104Q-G12D_log2FoldChange` < -LFC_CUTOFF) %>% dplyr::select(Ensembl_ID) %>% unique() %>% unlist()
sig_count <- rawCountTable_transform_detected[rownames(rawCountTable_transform_detected) %in% sig_dif, ]
sig_count_z <- t(apply(sig_count,1,zscore))
col_annot <- DESeqsampletable_ss[,c(1,3)]
colnames(col_annot) <- c("Condition", "Kras_status")
p <- pheatmap(sig_count_z, show_rownames = FALSE, annotation_col = col_annot, cluster_cols = FALSE)
ggsave(filename = "Heatmap_G12DK104Q_vs_G12D.png", p, width = 6, height = 10, path = "Figure")
include_graphics("Figure/Heatmap_G12DK104Q_vs_G12D.png")
# Volcano Plot
ggplot(dif_analysis, aes(x = `G12DK104Q-G12D_log2FoldChange`, y = -log(`G12DK104Q-G12D_padj`,10))) +
xlab("LFC") + ylab("-log10(P value)") +
geom_point(data = dif_analysis, alpha = 0.5, size = 1, color = "black") +
geom_point(data = subset(dif_analysis, `G12DK104Q-G12D_padj` < p_value & `G12DK104Q-G12D_log2FoldChange` > LFC_CUTOFF), alpha = 0.5, size = 1, color = "red") +
geom_point(data = subset(dif_analysis, `G12DK104Q-G12D_padj` < p_value & `G12DK104Q-G12D_log2FoldChange` < -LFC_CUTOFF), alpha = 0.5, size = 1, color = "blue") +
geom_hline(yintercept = -log(p_value,10)) +
geom_vline(xintercept = c(-LFC_CUTOFF,LFC_CUTOFF)) +
labs(title = "G12DK104Q vs G12D Volcano Plot")
## Warning: Removed 77 rows containing missing values (`geom_point()`).
There are only 11 genes used here as input so I’m not expecting a ton of interesting things coming out of this.
Classic GO analysis is performed here for all DE genes detected in this dataset. The reference list is list of genes detected in RNASeq. Three categories of GO terms are tested here, including biological process, molecular function and cellular component.
target_gene <- sig_dif
detected_gene <- rownames(detected_rawCountTable)
# Run GO enrichment analysis for biological process
ego_BP <- enrichGO(gene = target_gene,
universe = detected_gene,
keyType = "ENSEMBL",
OrgDb = org.Mm.eg.db,
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = p_value,
readable = TRUE)
# ego_BP <- pairwise_termsim(ego_BP)
# No enrichment found
# Output results from GO analysis to a table
# cluster_summary_BP <- data.frame(ego_BP)
# dir.create("Result/GO", showWarnings = F)
# write.csv(cluster_summary_BP, "Result/GO/G12DK104Q-G12D_GO analysis_BP.csv")
# Run GO enrichment analysis for molecular function
ego_MF <- enrichGO(gene = target_gene,
universe = detected_gene,
keyType = "ENSEMBL",
OrgDb = org.Mm.eg.db,
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff = p_value,
readable = TRUE)
ego_MF <- pairwise_termsim(ego_MF)
# Output results from GO analysis to a table
cluster_summary_MF <- data.frame(ego_MF)
write.csv(cluster_summary_MF, "Result/GO/G12DK104Q-G12D_GO analysis_MF.csv")
# Run GO enrichment analysis for cellular component
ego_CC <- enrichGO(gene = target_gene,
universe = detected_gene,
keyType = "ENSEMBL",
OrgDb = org.Mm.eg.db,
ont = "CC",
pAdjustMethod = "BH",
pvalueCutoff = p_value,
readable = TRUE)
ego_CC <- pairwise_termsim(ego_CC)
# Output results from GO analysis to a table
cluster_summary_CC <- data.frame(ego_CC)
write.csv(cluster_summary_CC, "Result/GO/G12DK104Q-G12D_GO analysis_CC.csv")
# dotplot(ego_BP, showCategory=25)
# emapplot(ego_BP, showCategory = 50)
# dotplot(ego_MF, showCategory=25)
# emapplot(ego_MF, showCategory = 50)
# dotplot(ego_CC, showCategory=25)
# emapplot(ego_CC, showCategory = 50)
target_gene <- dif_analysis_master %>% dplyr::filter(Ensembl_ID %in% sig_dif) %>% dplyr::select(entrezgene_id) %>% unlist() %>% as.character()
detected_gene <- dif_analysis_master %>% dplyr::select(entrezgene_id) %>% unlist() %>% as.character()
kk <- enrichKEGG(gene = target_gene,
universe = detected_gene,
organism = 'mmu',
keyType = "kegg",
pAdjustMethod = "BH",
pvalueCutoff = p_value)
kk.result <- as.data.frame(kk)
dir.create("Result/KEGG", showWarnings = FALSE)
write.csv(kk.result, "Result/KEGG/G12DK104Q-G12D_KEGG analysis.csv")
# dotplot(kk, showCategory=25)
gseadata <- dif_analysis_master$entrezgene_id
ranks <- dif_analysis$`G12DK104Q-G12D_s2n`
names(ranks) <- dif_analysis_master$entrezgene_id
ranks <- sort(ranks, decreasing = T)
fgseaRes <- fgsea(pathways, ranks, minSize=15, maxSize = 500)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
fgseaRes <- fgseaRes[fgseaRes$padj < p_value,]
suppressMessages(fgseaRes[, leadingEdge := lapply(leadingEdge, mapIds, x=org.Mm.eg.db, keytype="ENTREZID", column="SYMBOL")])
fgseaRes$leadingEdge <- as.character(fgseaRes$leadingEdge)
fgseaRes <- fgseaRes[order(-fgseaRes$NES), ]
dir.create("Result/GSEA", showWarnings = FALSE)
write.csv(as.data.frame(fgseaRes), "Result/GSEA/G12DK104Q-G12D_Sig_GSEA_Hallmark.csv")
grid.newpage()
plotGseaTable(pathways[fgseaRes$pathway], ranks, fgseaRes, gseaParam = 0.5)
dif_analysis_master <- left_join(dif_analysis_master, dif_analysis, by = c('Ensembl_ID'='Ensembl_ID'))
countFiles_ss <- countFiles[c(4:9)]
txi.salmon <- tximport(countFiles_ss, type = "salmon", tx2gene = tx2gene, ignoreTxVersion = TRUE)
## reading in files with read_tsv
## 1 2 3 4 5 6
## transcripts missing from tx2gene: 30916
## summarizing abundance
## summarizing counts
## summarizing length
DESeqsampletable_ss <- DESeqsampletable[c(4:9),]
DESeqsampletable_ss$condition <- c(rep("experimental", 3), rep("control", 3))
ddsHTSeq<- DESeqDataSetFromTximport(txi.salmon, DESeqsampletable_ss, ~ condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
raw_ct <- DESeq2::counts(ddsHTSeq)
norm_factor <- size_factor[rownames(size_factor) %in% rownames(raw_ct), c(4:9)]
normalizationFactors(ddsHTSeq) <- norm_factor
ddsHTSeq_norm <- DESeq(ddsHTSeq)
## using pre-existing normalization factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
ddsHTSeq_analysis <- lfcShrink(ddsHTSeq_norm, coef = "condition_experimental_vs_control", type = "apeglm")
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
DESeq2::plotMA(ddsHTSeq_analysis)
ddsHTSeq_transform <- varianceStabilizingTransformation(ddsHTSeq_norm)
rawCountTable_transform <- as.data.frame(assay(ddsHTSeq_transform))
pseudoCount_transform = log2(rawCountTable_transform + 1)
mat.dist = pseudoCount_transform
mat.dist = as.matrix(dist(t(mat.dist)))
mat.dist = mat.dist/max(mat.dist)
png('Figure/Hierchical_Clustering_G12DK104Q_vs_WT.png')
cim(mat.dist, symkey = FALSE, margins = c(6, 6))
suppressMessages(dev.off())
## quartz_off_screen
## 2
include_graphics("Figure/Hierchical_Clustering_G12DK104Q_vs_WT.png")
plotPCA(ddsHTSeq_transform, intgroup = "kras", ntop = 500)
dif_analysis <- as.data.frame(ddsHTSeq_analysis)
dif_analysis <- dif_analysis[rownames(dif_analysis) %in% keep_genes, ]
colnames(dif_analysis) <- paste0("G12DK104Q-WT_", colnames(dif_analysis))
dif_analysis <- rownames_to_column(dif_analysis, var = "Ensembl_ID")
rawCountTable <- as.data.frame(DESeq2::counts(ddsHTSeq_norm, normalize = TRUE))
detected_rawCountTable <- rawCountTable[rownames(rawCountTable) %in% keep_genes,]
dif_analysis$WT_mean <- rowMeans(detected_rawCountTable[,4:6])
dif_analysis$G12DK104Q_mean <- rowMeans(detected_rawCountTable[,1:3])
dif_analysis$'G12DK104Q-WT_s2n' <- apply(detected_rawCountTable,1,s2n,cond_1 = c(1:3),cond_2 = c(4:6))
suppressMessages(library(mosaic))
rawCountTable_transform_detected <- rawCountTable_transform[rownames(rawCountTable_transform) %in% keep_genes,]
sig_dif <- dif_analysis %>% dplyr::filter(`G12DK104Q-WT_padj`< p_value) %>% dplyr::filter(`G12DK104Q-WT_log2FoldChange` > LFC_CUTOFF | `G12DK104Q-WT_log2FoldChange` < -LFC_CUTOFF) %>% dplyr::select(Ensembl_ID) %>% unique() %>% unlist()
sig_count <- rawCountTable_transform_detected[rownames(rawCountTable_transform_detected) %in% sig_dif, ]
sig_count_z <- t(apply(sig_count,1,zscore))
col_annot <- DESeqsampletable_ss[,c(1,3)]
colnames(col_annot) <- c("Condition", "Kras_status")
p <- pheatmap(sig_count_z, show_rownames = FALSE, annotation_col = col_annot, cluster_cols = FALSE)
ggsave(filename = "Heatmap_G12DK104Q_vs_WT.png", p, width = 6, height = 10, path = "Figure")
include_graphics("Figure/Heatmap_G12DK104Q_vs_WT.png")
# Volcano Plot
ggplot(dif_analysis, aes(x = `G12DK104Q-WT_log2FoldChange`, y = -log(`G12DK104Q-WT_padj`,10))) +
xlab("LFC") + ylab("-log10(P value)") +
geom_point(data = dif_analysis, alpha = 0.5, size = 1, color = "black") +
geom_point(data = subset(dif_analysis, `G12DK104Q-WT_padj` < p_value & `G12DK104Q-WT_log2FoldChange` > LFC_CUTOFF), alpha = 0.5, size = 1, color = "red") +
geom_point(data = subset(dif_analysis, `G12DK104Q-WT_padj` < p_value & `G12DK104Q-WT_log2FoldChange` < -LFC_CUTOFF), alpha = 0.5, size = 1, color = "blue") +
geom_hline(yintercept = -log(p_value,10)) +
geom_vline(xintercept = c(-LFC_CUTOFF,LFC_CUTOFF)) +
labs(title = "G12DK104Q vs WT Volcano Plot")
## Warning: Removed 73 rows containing missing values (`geom_point()`).
There are only 11 genes used here as input so I’m not expecting a ton of interesting things coming out of this.
Classic GO analysis is performed here for all DE genes detected in this dataset. The reference list is list of genes detected in RNASeq. Three categories of GO terms are tested here, including biological process, molecular function and cellular component.
target_gene <- sig_dif
detected_gene <- rownames(detected_rawCountTable)
# Run GO enrichment analysis for biological process
ego_BP <- enrichGO(gene = target_gene,
universe = detected_gene,
keyType = "ENSEMBL",
OrgDb = org.Mm.eg.db,
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = p_value,
readable = TRUE)
ego_BP <- pairwise_termsim(ego_BP)
# Output results from GO analysis to a table
cluster_summary_BP <- data.frame(ego_BP)
dir.create("Result/GO", showWarnings = F)
write.csv(cluster_summary_BP, "Result/GO/G12DK104Q-WT_GO analysis_BP.csv")
# Run GO enrichment analysis for molecular function
ego_MF <- enrichGO(gene = target_gene,
universe = detected_gene,
keyType = "ENSEMBL",
OrgDb = org.Mm.eg.db,
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff = p_value,
readable = TRUE)
ego_MF <- pairwise_termsim(ego_MF)
# Output results from GO analysis to a table
cluster_summary_MF <- data.frame(ego_MF)
write.csv(cluster_summary_MF, "Result/GO/G12DK104Q-WT_GO analysis_MF.csv")
# Run GO enrichment analysis for cellular component
ego_CC <- enrichGO(gene = target_gene,
universe = detected_gene,
keyType = "ENSEMBL",
OrgDb = org.Mm.eg.db,
ont = "CC",
pAdjustMethod = "BH",
pvalueCutoff = p_value,
readable = TRUE)
ego_CC <- pairwise_termsim(ego_CC)
# Output results from GO analysis to a table
cluster_summary_CC <- data.frame(ego_CC)
write.csv(cluster_summary_CC, "Result/GO/G12DK104Q-WT_GO analysis_CC.csv")
dotplot(ego_BP, showCategory=25)
emapplot(ego_BP, showCategory = 50)
dotplot(ego_MF, showCategory=25)
emapplot(ego_MF, showCategory = 50)
dotplot(ego_CC, showCategory=25)
emapplot(ego_CC, showCategory = 50)
target_gene <- dif_analysis_master %>% dplyr::filter(Ensembl_ID %in% sig_dif) %>% dplyr::select(entrezgene_id) %>% unlist() %>% as.character()
detected_gene <- dif_analysis_master %>% dplyr::select(entrezgene_id) %>% unlist() %>% as.character()
kk <- enrichKEGG(gene = target_gene,
universe = detected_gene,
organism = 'mmu',
keyType = "kegg",
pAdjustMethod = "BH",
pvalueCutoff = p_value)
kk.result <- as.data.frame(kk)
#dir.create("Result/KEGG", showWarnings = FALSE)
#write.csv(kk.result, "Result/KEGG/G12DK104Q-WT_KEGG analysis.csv")
# dotplot(kk, showCategory=25)
gseadata <- dif_analysis_master$entrezgene_id
ranks <- dif_analysis$`G12DK104Q-WT_s2n`
names(ranks) <- dif_analysis_master$entrezgene_id
ranks <- sort(ranks, decreasing = T)
fgseaRes <- fgsea(pathways, ranks, minSize=15, maxSize = 500)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
fgseaRes <- fgseaRes[fgseaRes$padj < p_value,]
suppressMessages(fgseaRes[, leadingEdge := lapply(leadingEdge, mapIds, x=org.Mm.eg.db, keytype="ENTREZID", column="SYMBOL")])
fgseaRes$leadingEdge <- as.character(fgseaRes$leadingEdge)
fgseaRes <- fgseaRes[order(-fgseaRes$NES), ]
dir.create("Result/GSEA", showWarnings = FALSE)
write.csv(as.data.frame(fgseaRes), "Result/GSEA/G12DK104Q-WT_Sig_GSEA_Hallmark.csv")
grid.newpage()
plotGseaTable(pathways[fgseaRes$pathway], ranks, fgseaRes, gseaParam = 0.5)
dif_analysis_master <- left_join(dif_analysis_master, dif_analysis, by = c('Ensembl_ID'='Ensembl_ID'))
write_csv(dif_analysis_master, "Result/Differential_analysis.csv")
G12D_WT_DEG <- dif_analysis_master %>% dplyr::filter(`G12D-WT_padj`< p_value) %>% dplyr::filter(`G12D-WT_log2FoldChange` > LFC_CUTOFF | `G12D-WT_log2FoldChange` < -LFC_CUTOFF) %>% dplyr::select(Ensembl_ID) %>% unique() %>% unlist()
G12DK104Q_G12D_DEG <- dif_analysis_master %>% dplyr::filter(`G12DK104Q-G12D_padj`< p_value) %>% dplyr::filter(`G12DK104Q-G12D_log2FoldChange` > LFC_CUTOFF | `G12DK104Q-G12D_log2FoldChange` < -LFC_CUTOFF) %>% dplyr::select(Ensembl_ID) %>% unique() %>% unlist()
overlap <- intersect(G12D_WT_DEG, G12DK104Q_G12D_DEG)
grid.newpage()
v <- draw.pairwise.venn(length(G12D_WT_DEG),
length(G12DK104Q_G12D_DEG),
length(overlap),
catergory <- c("G12D_WT_DEG",
"G12DK104Q_G12D_DEG"),
lty = "blank",
ex.text = FALSE,
fill = c("pink", "lightblue"),
cat.pos = c(250, 135), cat.dist = 0.1, margin = 0.1,
fontfamily = "sans", cat.fontfamily = "sans")
ggsave(filename = "Venn_G12DK104Q_vs_G12D overlap with G12D_vs_WT.pdf", v, width = 6, height = 4, path = "Figure", device = "pdf")
overlap_count <- rawCountTable_transform_all[rownames(rawCountTable_transform_all) %in% overlap, ]
overlap_count_z <- t(apply(overlap_count,1,zscore))
overlap_count_z <- overlap_count_z[,c(7:9, 1:6)]
col_annot <- DESeqsampletable[,c(3,4)]
colnames(col_annot) <- c("Kras_status", "Gender")
p <- pheatmap(overlap_count_z, show_rownames = FALSE, annotation_col = col_annot, cluster_cols = FALSE)
ggsave(filename = "Heatmap_G12DK104Q_vs_G12D overlap with G12D_vs_WT.pdf", p, width = 6, height = 4, path = "Figure", device = "pdf")
# include_graphics("Figure/Heatmap_G12DK104Q_vs_G12D overlap with G12D_vs_WT.pdf")
G12D_WT_DEG <- dif_analysis_master %>% dplyr::filter(`G12D-WT_padj`< p_value) %>% dplyr::filter(`G12D-WT_log2FoldChange` > LFC_CUTOFF | `G12D-WT_log2FoldChange` < -LFC_CUTOFF) %>% dplyr::select(Ensembl_ID) %>% unique() %>% unlist()
G12DK104Q_WT_DEG <- dif_analysis_master %>% dplyr::filter(`G12DK104Q-WT_padj`< p_value) %>% dplyr::filter(`G12DK104Q-WT_log2FoldChange` > LFC_CUTOFF | `G12DK104Q-WT_log2FoldChange` < -LFC_CUTOFF) %>% dplyr::select(Ensembl_ID) %>% unique() %>% unlist()
overlap <- intersect(G12D_WT_DEG, G12DK104Q_WT_DEG)
grid.newpage()
v <- draw.pairwise.venn(length(G12D_WT_DEG),
length(G12DK104Q_WT_DEG),
length(overlap),
catergory <- c("G12D_WT_DEG",
"G12DK104Q_WT_DEG"),
lty = "blank",
ex.text = FALSE,
fill = c("pink", "lightblue"),
cat.pos = c(250, 135), cat.dist = 0.1, margin = 0.1,
fontfamily = "sans", cat.fontfamily = "sans")
ggsave(filename = "Venn_G12DK104Q_vs_WT overlap with G12D_vs_WT.pdf", v, width = 6, height = 4, path = "Figure", device = "pdf")
overlap_count <- rawCountTable_transform_all[rownames(rawCountTable_transform_all) %in% overlap, ]
overlap_count_z <- t(apply(overlap_count,1,zscore))
overlap_count_z <- overlap_count_z[,c(7:9, 1:6)]
col_annot <- DESeqsampletable[,c(3,4)]
colnames(col_annot) <- c("Kras_status", "Gender")
p <- pheatmap(overlap_count_z, show_rownames = FALSE, annotation_col = col_annot, cluster_cols = FALSE)
ggsave(filename = "Heatmap_G12DK104Q_vs_WT overlap with G12D_vs_WT.pdf", p, width = 6, height = 8, path = "Figure", device = "pdf")
# include_graphics("Figure/Heatmap_G12DK104Q_vs_WT overlap with G12D_vs_WT.pdf")
G12DK104Q_WT_DEG <- dif_analysis_master %>% dplyr::filter(`G12DK104Q-WT_padj`< p_value) %>% dplyr::filter(`G12DK104Q-WT_log2FoldChange` > LFC_CUTOFF | `G12DK104Q-WT_log2FoldChange` < -LFC_CUTOFF) %>% dplyr::select(Ensembl_ID) %>% unique() %>% unlist()
overlap_count <- rawCountTable_transform_all[rownames(rawCountTable_transform_all) %in% G12DK104Q_WT_DEG, ]
overlap_count_z <- t(apply(overlap_count,1,zscore))
overlap_count_z <- overlap_count_z[,c(7:9, 1:6)]
col_annot <- DESeqsampletable[,c(3,4)]
colnames(col_annot) <- c("Kras_status", "Gender")
p <- pheatmap(overlap_count_z, show_rownames = FALSE, annotation_col = col_annot, cluster_cols = FALSE)
ggsave(filename = "Heatmap_G12DK104Q_vs_WT with all samples.pdf", p, width = 6, height = 8, path = "Figure", device = "pdf")
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.6.3
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] mosaic_1.8.4.2 mosaicData_0.20.3
## [3] ggformula_0.10.4 Matrix_1.5-4
## [5] enrichplot_1.18.4 pheatmap_1.0.12
## [7] fgsea_1.24.0 biomaRt_2.54.1
## [9] knitr_1.42 gtools_3.9.4
## [11] apeglm_1.20.0 pathview_1.38.0
## [13] org.Mm.eg.db_3.16.0 DOSE_3.25.0.002
## [15] clusterProfiler_4.7.1.002 VennDiagram_1.7.3
## [17] futile.logger_1.4.3 RColorBrewer_1.1-3
## [19] gplots_3.1.3 mixOmics_6.22.0
## [21] MASS_7.3-58.3 reshape_0.8.9
## [23] lattice_0.21-8 EnsDb.Mmusculus.v79_2.99.0
## [25] ensembldb_2.22.0 AnnotationFilter_1.22.0
## [27] GenomicFeatures_1.50.4 AnnotationDbi_1.60.2
## [29] gridExtra_2.3 lubridate_1.9.2
## [31] forcats_1.0.0 stringr_1.5.0
## [33] dplyr_1.1.2 purrr_1.0.1
## [35] readr_2.1.4 tidyr_1.3.0
## [37] tibble_3.2.1 ggplot2_3.4.2
## [39] tidyverse_2.0.0 tximport_1.26.1
## [41] DESeq2_1.38.3 SummarizedExperiment_1.28.0
## [43] Biobase_2.58.0 MatrixGenerics_1.10.0
## [45] matrixStats_0.63.0 GenomicRanges_1.50.2
## [47] GenomeInfoDb_1.34.9 IRanges_2.32.0
## [49] S4Vectors_0.36.2 BiocGenerics_0.44.0
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.3 ggstance_0.3.6 tidyselect_1.2.0
## [4] RSQLite_2.3.1 BiocParallel_1.32.6 scatterpie_0.1.8
## [7] munsell_0.5.0 ragg_1.2.5 codetools_0.2-19
## [10] withr_2.5.0 colorspace_2.1-0 GOSemSim_2.24.0
## [13] filelock_1.0.2 highr_0.10 rstudioapi_0.14
## [16] labeling_0.4.2 KEGGgraph_1.58.3 bbmle_1.0.25
## [19] GenomeInfoDbData_1.2.9 polyclip_1.10-4 bit64_4.0.5
## [22] farver_2.1.1 downloader_0.4 coda_0.19-4
## [25] vctrs_0.6.2 treeio_1.22.0 generics_0.1.3
## [28] lambda.r_1.2.4 gson_0.1.0 xfun_0.39
## [31] timechange_0.2.0 BiocFileCache_2.6.1 R6_2.5.1
## [34] graphlayouts_0.8.4 locfit_1.5-9.7 bitops_1.0-7
## [37] cachem_1.0.7 gridGraphics_0.5-1 DelayedArray_0.24.0
## [40] vroom_1.6.1 BiocIO_1.8.0 scales_1.2.1
## [43] ggraph_2.1.0 gtable_0.3.3 tidygraph_1.2.3
## [46] rlang_1.1.0 systemfonts_1.0.4 splines_4.2.2
## [49] rtracklayer_1.58.0 lazyeval_0.2.2 mosaicCore_0.9.2.1
## [52] yaml_2.3.7 reshape2_1.4.4 qvalue_2.30.0
## [55] tools_4.2.2 ggplotify_0.1.0 jquerylib_0.1.4
## [58] ggridges_0.5.4 Rcpp_1.0.10 plyr_1.8.8
## [61] progress_1.2.2 zlibbioc_1.44.0 RCurl_1.98-1.12
## [64] prettyunits_1.1.1 viridis_0.6.2 cowplot_1.1.1
## [67] haven_2.5.2 ggrepel_0.9.3 magrittr_2.0.3
## [70] data.table_1.14.8 RSpectra_0.16-1 futile.options_1.0.1
## [73] mvtnorm_1.1-3 ggnewscale_0.4.8 ProtGenerics_1.30.0
## [76] hms_1.1.3 patchwork_1.1.2 evaluate_0.20
## [79] xtable_1.8-4 HDO.db_0.99.1 XML_3.99-0.14
## [82] emdbook_1.3.12 bdsmatrix_1.3-6 compiler_4.2.2
## [85] ellipse_0.4.5 KernSmooth_2.23-20 crayon_1.5.2
## [88] shadowtext_0.1.2 htmltools_0.5.5 ggfun_0.0.9
## [91] corpcor_1.6.10 tzdb_0.3.0 geneplotter_1.76.0
## [94] aplot_0.1.10 DBI_1.1.3 tweenr_2.0.2
## [97] formatR_1.14 dbplyr_2.3.2 rappdirs_0.3.3
## [100] cli_3.6.1 parallel_4.2.2 igraph_1.4.2
## [103] pkgconfig_2.0.3 GenomicAlignments_1.34.1 numDeriv_2016.8-1.1
## [106] xml2_1.3.3 ggtree_3.6.2 rARPACK_0.11-0
## [109] annotate_1.76.0 bslib_0.4.2 XVector_0.38.0
## [112] yulab.utils_0.0.6 digest_0.6.31 graph_1.76.0
## [115] Biostrings_2.66.0 rmarkdown_2.21 fastmatch_1.1-3
## [118] tidytree_0.4.2 restfulr_0.0.15 curl_5.0.0
## [121] Rsamtools_2.14.0 rjson_0.2.21 lifecycle_1.0.3
## [124] nlme_3.1-162 jsonlite_1.8.4 viridisLite_0.4.1
## [127] fansi_1.0.4 labelled_2.11.0 pillar_1.9.0
## [130] KEGGREST_1.38.0 fastmap_1.1.1 httr_1.4.5
## [133] GO.db_3.16.0 glue_1.6.2 png_0.1-8
## [136] bit_4.0.5 Rgraphviz_2.42.0 ggforce_0.4.1
## [139] stringi_1.7.12 sass_0.4.5 blob_1.2.4
## [142] textshaping_0.3.6 org.Hs.eg.db_3.16.0 caTools_1.18.2
## [145] memoise_2.0.1 ape_5.7-1